home *** CD-ROM | disk | FTP | other *** search
- /* National Institute of Standards and Technology (NIST)
- /* National Computer System Laboratory (NCSL)
- /* Office Systems Engineering (OSE) Group
- /* ********************************************************************
- /* D I S C L A I M E R
- /* (March 8, 1989)
- /*
- /* There is no warranty for the NIST NCSL OSE SGML parser and/or the NIST
- /* NCSL OSE SGML parser validation suite. If the SGML parser and/or
- /* validation suite is modified by someone else and passed on, NIST wants
- /* the parser's recipients to know that what they have is not what NIST
- /* distributed, so that any problems introduced by others will not
- /* reflect on our reputation.
- /*
- /* Policies
- /*
- /* 1. Anyone may copy and distribute verbatim copies of the SGML source
- /* code as received in any medium.
- /*
- /* 2. Anyone may modify your copy or copies of SGML parser source code or
- /* any portion of it, and copy and distribute such modifications provided
- /* that all modifications are clearly associated with the entity that
- /* performs the modifications.
- /*
- /* NO WARRANTY
- /* ===========
- /*
- /* NIST PROVIDES ABSOLUTELY NO WARRANTY. THE SGML PARSER AND VALIDATION
- /* SUITE ARE PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
- /* EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
- /* THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS
- /* WITH YOU. SHOULD THE SGML PARSER OR VALIDATION SUITE PROVE DEFECTIVE,
- /* YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
- /*
- /* IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW WILL NIST BE LIABLE FOR
- /* DAMAGES, INCLUDING ANY LOST PROFITS, LOST MONIES, OR OTHER SPECIAL,
- /* INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- /* INABILITY TO USE (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA
- /* BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY THIRD PARTIES OR A
- /* FAILURE OF THE PROGRAM TO OPERATE WITH PROGRAMS NOT DISTRIBUTED BY
- /* NIST) THE PROGRAM, EVEN IF YOU HAVE BEEN ADVISED OF THE POSSIBILITY OF
- /* SUCH DAMAGES, OR FOR ANY CLAIM BY ANY OTHER PARTY.
- */
-
- /***************************************************************/
- /* TITLE: SGML PARSER */
- /* SYSTEM: DTD PREPROCESSOR */
- /* SUBSYSTEM: PREPROCESSOR for DETERMINING */
- /* AMBIGUOUS CONTENT MODELS */
- /* SOURCE FILE: PREPROC.C */
- /* AUTHOR: Steven Lindeman */
- /* DATE CREATED: 05January1987 */
- /***************************************************************/
-
- /***************************************************************/
- /* PREPROC -- Takes a valid content model to be reduced or */
- /* simplified and tokenizes it for use with */
- /* determin.c */
- /***************************************************************/
- #include <stdio.h>
- #include <ctype.h>
- #include "detdefs.h"
- #include "detglbl.h"
-
- void preproc(expression,buffer)
- char expression[];
- ITEM buffer[];
- {
- int i,size,low,high,whocares;
-
- low = size = 0;
- high = -1;
- init();
- for (i=0; expression[i]!='\0'; i++)
- high++; /* set high */
- whocares = reduce(expression,low,high,START);
- tokenize(reducedexpr,buffer,&size);
- #ifdef JJJ
- printf("Expression is -> %s\n",expression);
- for (i=0; i<size; i++)
- printf("%02d ",buffer[i].itoken);
- printf("\n");
- #endif
- return;
- }
-
-
- /***********************************/
- /* INIT */
- /***********************************/
- void init()
- {
- int *iptr,i;
-
- symtabindx = 0;
- memset((char *)buffer, '\0', sizeof(buffer));
- for(i=0; i<BUFFSIZE; i++)
- reducedexpr[i] = '\0';
- index = 0;
- }
-
- /***********************************/
- /* TOKENIZE */
- /***********************************/
- void tokenize(expression,buffer,j)
- char expression[];
- ITEM buffer[];
- int *j;
- {
- int i,position;
- char name[NAMELEN+1];
-
- i=0;
-
- while(expression[i] != '\0') {
- switch(expression[i]) {
- case '(': /* GRPO */
- if (*j >= BUFFSIZE){
- printf("overflow in preproc()\n");
- exit(0);
- }
- buffer[(*j)++].itoken = GRPO;
- break;
- case ')': /* GRPC */
- if ((*j) >= BUFFSIZE){
- printf("overflow in preproc()\n");
- exit(0);
- }
- buffer[(*j)++].itoken = GRPC;
- i++; /* get next char */
- handleoi(expression,buffer,&i,j); /* tokenize occurr. ind. */
- break;
- case '&':
- case '|': /* AND, OR */
- if ((*j) >= BUFFSIZE){
- printf("overflow in preproc()\n");
- exit(0);
- }
- buffer[(*j)++].itoken = OR; /* AND must be converted to OR */
- break;
- case ',': /* SEQ */
- if ((*j) >= BUFFSIZE){
- printf("overflow in preproc()\n");
- exit(0);
- }
- buffer[(*j)++].itoken = SEQ;
- break;
- default: /* must be a element name */
- get_name(expression,&i,name); /* get element name */
- position = mysearch(name); /* find position */
- if ((*j) >= BUFFSIZE){
- printf("overflow in preproc()\n");
- exit(0);
- }
- buffer[(*j)++].itoken = position+17; /* tokenize name */
- handleoi(expression,buffer,&i,j); /* tokenize o.i. */
- break;
- }
- i++; /* next char */
- }
-
- return;
- }
-
- /*************************************/
- /* SEARCH */
- /*************************************/
- mysearch(name)
- char name[];
- {
- int found,position;
-
-
- found = FALSE;
- position=0;
- while ((position < symtabindx) && (found != TRUE)) /* search table linearly */
- if (strcmp(symtable[position].entry,name) == 0)
- found = TRUE; /* name was found */
- else
- position++;
- if (found == FALSE) { /* not found in table */
- strcpy(symtable[symtabindx].entry,name); /* add to table */
- symtabindx++; /* increment index */
- }
-
- return(position); /* position found */
- }
-
- /*************************************/
- /* GET_NAME */
- /*************************************/
- void get_name(expression,i,name)
- char expression[];
- int *i;
- char name[];
- {
- int j;
-
- j = 0;
-
- while(isvalid(expression[*i]))
- name[j++] = expression[(*i)++]; /* load name */
- /* i will point to the next char */
- name[j] = '\0'; /* null terminate */
-
- return;
- }
-
- /*************************************/
- /* HANDLEOI */
- /*************************************/
- void handleoi(expression,buffer,i,j)
- char expression[];
- ITEM buffer[];
- int *i,*j;
- {
- switch(expression[*i]) {
- case '?': /* OPT */
- buffer[(*j)++].itoken = OPT;
- break;
- case '*': /* REP */
- buffer[(*j)++].itoken = REP;
- break;
- case '+': /* PLUS */
- buffer[(*j)++].itoken = PLUS;
- break;
- default: /* no oi, must add REQ */
- buffer[(*j)++].itoken = REQ;
- (*i)--; /* unget char */
- break;
- }
-
- return;
- }
-
- /*********************************/
- /* REMOVE_BLANKS *****************/
- /*********************************/
- void remove_blanks(newexpr,oldexpr)
- char newexpr[],oldexpr[];
- {
- int i,j;
-
- i=j=0;
-
- while (oldexpr[i] != '\0') /* not end of string */
- if (oldexpr[i] != ' ') /* not blank */
- newexpr[j++] = oldexpr[i++]; /* copy char */
- else
- i++; /* incr i to skip blank */
- newexpr[j] = '\0'; /* null newexpr */
-
- return;
- }
-
- /*********************************/
- /* VALIDATE_EXPR *****************/
- /*********************************/
- void validate_expr(expr)
- char expr[];
- {
- int i;
-
- i=0;
-
- while (expr[i] != '\0') { /* not end of string */
- switch(expr[i]) {
- case '(':
- case ')':
- case '+':
- case '*':
- case '?':
- case ',':
- case '|':
- case '&':
- break;
- default: /* must be a char or error */
- if (!(isvalid(expr[i]))) {
- printf("Illegal character in content model\n");
- printf("Character -> '%c' , Character number %d\n",
- expr[i],i+1);
- exit(99);
- }
- break;
- }
- i++;
- }
-
- return;
- }
-
- /********************************/
- /* GET_EXPR *********************/
- /********************************/
- get_expr(expr)
- char expr[];
- {
- char temp[BUFFSIZE];
- int length;
-
- length = get_string(temp,BUFFSIZE);
- strcpy(expr,temp); /* copy expr from temp */
- remove_blanks(expr,temp); /* remove blanks from expr */
- validate_expr(expr); /* validate expression */
-
- return(length);
- }
-
- /********************************/
- /* GET_STRING *******************/
- /********************************/
- get_string(temp,size)
- char temp[];
- int size;
- {
- int c,i;
-
- i = 0;
-
- while (--size > 0 && (c=getchar()) != EOF && c != '\n')
- temp[i++] = c;
- temp[i] = '\0';
-
- return(i);
- }
-
- /********************************/
- /* REDUCE ***********************/
- /********************************/
- reduce(expr,low,high,came_from)
- char expr[];
- int low,high;
- int came_from;
- {
- int i,scan,numstriped,level,reduced,startinsert;
- int currentoi,newoi,saveoi;
-
- reduced = FALSE;
- numstriped = 0;
- saveoi = currentoi = '1';
-
- while (reduced == FALSE) {
- level = 0;
- for (scan=low; scan<=high; scan++) { /* scan for connector at level==0 */
- switch(expr[scan]) {
- case '|':
- case '&':
- case ',':
- if (level==0) {
- startinsert = index;
- currentoi = reduce(expr,low,scan-1,CONNECTOR);
- reducedexpr[index++] = expr[scan];
- currentoi = reduce(expr,scan+1,high,CONNECTOR);
- currentoi = 1;
- reduced = TRUE;
- scan = high+1;
- if (numstriped > 0)
- replace_parens(startinsert,saveoi);
- }
- break;
- case '(':
- level++;
- break;
- case ')':
- level--;
- break;
- }
- }
- if (reduced == FALSE)
- if (expr[high] == ')' || expr[high-1] == ')' ) {
- if (expr[high] == ')' ) {
- newoi = '1';
- high--;
- }
- else
- if (expr[high-1] == ')' ) {
- newoi = expr[high];
- high = high-2;
- }
- low++;
- numstriped++;
- update_oi(¤toi,newoi);
- saveoi = currentoi;
- }
- else { /* must be terminal */
- switch(expr[high]) {
- case '+':
- case '?':
- case '*':
- newoi = expr[high--]; /* strip oi */
- break;
- default:
- newoi = '1';
- break;
- }
- update_oi(¤toi,newoi);
- for (i=low; i<=high; i++) /* add to reducedexpr */
- reducedexpr[index++] = expr[i];
- if (numstriped > 0 && came_from == START)
- replace_parens(0,currentoi);
- if (currentoi != '1' && came_from == CONNECTOR)
- reducedexpr[index++] = currentoi;
- reduced = TRUE;
- }
- }
- return(currentoi);
- }
- /**************************************/
- void replace_parens(insert,oi)
- int insert;
- char oi;
- {
- int i;
-
- for (i=index; i>insert; i--)
- reducedexpr[i] = reducedexpr[i-1];
- index++;
- reducedexpr[insert] = '(';
- reducedexpr[index++] = ')';
- if (oi != '1')
- reducedexpr[index++] = oi;
-
- return;
- }
-
- /*******************************************/
- /* UPDATEOI */
- /*******************************************/
- void update_oi(currentoi, newoi)
- int *currentoi;
- char newoi;
- {
- if (newoi == '+')
- if ((*currentoi == '+') || (*currentoi == '1'))
- *currentoi = '+';
- else
- *currentoi = '*';
- if (newoi == '?')
- if ((*currentoi == '?') || (*currentoi == '1'))
- *currentoi = '?';
- else
- *currentoi = '*';
- if (newoi == '*')
- *currentoi = '*';
- return;
- }
- /*******************************************/
- /* ISVALID */
- /*******************************************/
- int isvalid(c)
- char c;
- {
- switch (c) {
- case '.':
- case '-':
- case '#':
- return(TRUE);
- default:
- if(isalnum(c))
- return(TRUE);
- return(FALSE);
- }
- }
-